#!bin/usr/R
# Thomas J. Leeper
# Aarhus University
# Created 2012-05-10
# Updated 2012-07-26
# Updated 2013-07-26

##########################################################################
### "The Informational Basis for Mass Polarization" Analysis (Study 2) ###
##########################################################################

data <- read.csv("data-final-2012-06-08.csv")

library(car)
library(coin)
library(xtable)

source("coefpaste.r")
source("expResults.r")
.groupNames <- c("High Pro", "Low Pro", "High Mixed", "Low Mixed", "High Con", "Low Con", "Control")

# RECODING

## dv binary t1
data$dv1bin <- recode(data$dv1,"1=-1;2=-1;3=-1;4=0;5=1;6=1;7=1;else=1")
## different environment constructions
data$env <- recode(data$condition,"1=1;2=1;3=2;4=2;5=3;6=3;7=4")
data$impbin <- recode(data$condition,"1=1;2=0;3=1;4=0;5=1;6=0;7=NA")
data$imp1bin <- recode(data$dv_imp1,"1=-1;2=-1;3=0;3.5=1;4=1;5=1") # 3 was moderately important
data$imp1binA <- recode(data$dv_imp1,"1=0;2=0;3=1;3.5=1;4=1;5=1") # 3 was moderately important (coded as important, 1)
data$imp1binB <- recode(data$dv_imp1,"1=0;2=0;3=0;3.5=1;4=1;5=1") # 3 was moderately important (coded as unimportant, 0)


## NEED TO RESCALE ALL VARIABLES (0,1)
data$dv1 <- (data$dv1-4)/3
data$dv2 <- (data$dv2-4)/3
data$dv_salary <- (data$dv_salary-6)/5
data$dvchg <- data$dv2 - data$dv1
data$dv_cert1 <- (data$dv_cert1-3)/2
data$dv_cert2 <- (data$dv_cert2-3)/2
data$certchg <- (data$dv_cert2 - data$dv_cert1)/2


## CREATE SEARCH PROPORTION VARIABLES
data$prop.hc <- data$total.hc/data$total
data$prop.pro <- data$total.pro/data$total
data$prop.con <- data$total.con/data$total
data$total.bal <- data$total.pro - data$total.con
data$prop.bal <- data$prop.pro - data$prop.con


# DESCRIPTIVES (DV)
cat(coefpaste(mean(data$dv1),sd(data$dv1)/sqrt(dim(data)[1])),"\n")
cat(coefpaste(mean(data$dv2),sd(data$dv2)/sqrt(dim(data)[1])),"\n")
cat(coefpaste(mean(data$dvchg),sd(data$dvchg)/sqrt(dim(data)[1])),"\n")
cat(coefpaste(mean(data$dv_percent),sd(data$dv_percent)/sqrt(dim(data)[1])),"\n")

dv1.results <- with(data,expResults(dv1,condition))
dv2.results <- with(data,expResults(dv2,condition))
dvchg.results <- with(data,expResults(dvchg,condition))
dv_perc.results <- with(data,expResults(dv_percent,condition))

# table for appendix
means <- data.frame(cbind(mapply(coefpaste,dv1.results[,1],dv1.results[,4]),
		mapply(coefpaste,dv2.results[,1],dv1.results[,4]),
		mapply(coefpaste,dvchg.results[,1],dv1.results[,4]),
		mapply(coefpaste,dv_perc.results[,1],dv1.results[,4])))
names(means) <- c("t1","t2","t2-t1","t2 (salary %)")
xtable(means,label="tab:means2",caption="Treatment Group Opinion Means",align=c("l","r","r","r","r"))


## RESULTS

with(data,kruskal.test(dv1~condition))
with(data,kruskal.test(dv2~condition))
with(data,kruskal.test(dvchg~condition))
with(data,kruskal.test(dv_percent~condition))

with(subset(data,dv1bin==-1),kruskal.test(dv2~condition))
with(subset(data,dv1bin==1),kruskal.test(dv2~condition))

with(subset(data,dv1bin==-1),kruskal.test(dvchg~condition))
with(subset(data,dv1bin==1),kruskal.test(dvchg~condition))

with(subset(data,dv1bin==-1),kruskal.test(dv_percent~condition))
with(subset(data,dv1bin==1),kruskal.test(dv_percent~condition))


## RESULTS BY PRIOR
with(subset(data,dv1bin==-1 & condition %in% c(1,2)),independence_test(dvchg~condition))
with(subset(data,dv1bin==-1 & condition %in% c(3,4)),independence_test(dvchg~condition))
with(subset(data,dv1bin==-1 & condition %in% c(5,6)),independence_test(dvchg~condition))

with(subset(data,dv1bin==1 & condition %in% c(1,2)),independence_test(dvchg~condition))
with(subset(data,dv1bin==1 & condition %in% c(3,4)),independence_test(dvchg~condition))
with(subset(data,dv1bin==1 & condition %in% c(5,6)),independence_test(dvchg~condition))

with(subset(data,condition==1 & dv1bin %in% c(-1,1)),independence_test(dvchg~dv1bin))
with(subset(data,condition==2 & dv1bin %in% c(-1,1)),independence_test(dvchg~dv1bin))
with(subset(data,condition==3 & dv1bin %in% c(-1,1)),independence_test(dvchg~dv1bin))
with(subset(data,condition==4 & dv1bin %in% c(-1,1)),independence_test(dvchg~dv1bin))
with(subset(data,condition==5 & dv1bin %in% c(-1,1)),independence_test(dvchg~dv1bin))
with(subset(data,condition==6 & dv1bin %in% c(-1,1)),independence_test(dvchg~dv1bin))


## REGRESSION OF T2 on T1 AND TREAMENT for ONLINE APPENDIX
data$dv1temp <- recode(data$dv1bin,"-1=0;1=1;else=NA")
lm.cond <- summary(lm(dv2~ factor(condition,c(7,1:6)),data=data))
lm.nofixed <- summary(lm(dv2~ factor(condition,c(7,1:6))*dv1temp,data=data))
lm.fixedeffects1 <- summary(lm(dv2~ factor(condition,c(7,1:6))*dv1temp+factor(wave),data=data))
lm.fixedeffects2 <- summary(lm(dv2~ factor(condition,c(7,1:6))*dv1temp*factor(wave),data=data))
tab <- cbind(
		c(coefpaste(lm.cond$coef[,1],lm.cond$coef[,2]),rep("-",35)),
		c(coefpaste(lm.nofixed$coef[,1],lm.nofixed$coef[,2])[1:8],rep("-",2),
			coefpaste(lm.nofixed$coef[,1],lm.nofixed$coef[,2])[9:14],rep("-",26)),
		c(coefpaste(lm.fixedeffects1$coef[,1],lm.fixedeffects1$coef[,2]),rep("-",26)),
		coefpaste(lm.fixedeffects2$coef[,1],lm.fixedeffects2$coef[,2])
		)
rownames(tab) <- c("Intercept","High Pro", "Low Pro", "High Mixed", "Low Mixed", "High Con", "Low Con",
					"Prior Supporter", "MTurk", "Adults", 
					"Prior * High Pro", "Prior * Low Pro", "Prior * High Mixed", "Prior * Low Mixed", "Prior * High Con", "Prior * Low Con",
					"MTurk * High Pro", "MTurk * Low Pro", "MTurk * High Mixed", "MTurk * Low Mixed", "MTurk * High Con", "MTurk * Low Con",
					"Adults * High Pro", "Adults * Low Pro", "Adults * High Mixed", "Adults * Low Mixed", "Adults * High Con", "Adults * Low Con",
					"Prior * MTurk", "Prior * Adults", 
					"Prior * MTurk * High Pro", "Prior * MTurk * Low Pro", "Prior * MTurk * High Mixed",
					"Prior * MTurk * Low Mixed", "Prior * MTurk * High Con", "Prior * MTurk * Low Con",
					"Prior * Adults * High Pro", "Prior * Adults * Low Pro", "Prior * Adults * High Mixed",
					"Prior * Adults * Low Mixed", "Prior * Adults * High Con", "Prior * Adults * Low Con"
)
xtable(tab, align=c("l","r","r","r","r"))

## REGRESSION, CONTROLLING FOR SAMPLE for ONLINE APPENDIX
lm.cond <- summary(lm(dvchg~ factor(condition,c(7,1:6)),data=data))
lm.nofixed <- summary(lm(dvchg~ factor(condition,c(7,1:6))*dv1temp,data=data))
lm.fixedeffects1 <- summary(lm(dvchg~ factor(condition,c(7,1:6))*dv1temp+factor(wave),data=data))
lm.fixedeffects2 <- summary(lm(dvchg~ factor(condition,c(7,1:6))*dv1temp*factor(wave),data=data))
tab <- cbind(
		c(coefpaste(lm.cond$coef[,1],lm.cond$coef[,2]),rep("-",35)),
		c(coefpaste(lm.nofixed$coef[,1],lm.nofixed$coef[,2])[1:8],rep("-",2),
			coefpaste(lm.nofixed$coef[,1],lm.nofixed$coef[,2])[9:14],rep("-",26)),
		c(coefpaste(lm.fixedeffects1$coef[,1],lm.fixedeffects1$coef[,2]),rep("-",26)),
		coefpaste(lm.fixedeffects2$coef[,1],lm.fixedeffects2$coef[,2])
		)
rownames(tab) <- c("Intercept","High Pro", "Low Pro", "High Mixed", "Low Mixed", "High Con", "Low Con",
					"Prior Supporter", "MTurk", "Adults", 
					"Prior * High Pro", "Prior * Low Pro", "Prior * High Mixed", "Prior * Low Mixed", "Prior * High Con", "Prior * Low Con",
					"MTurk * High Pro", "MTurk * Low Pro", "MTurk * High Mixed", "MTurk * Low Mixed", "MTurk * High Con", "MTurk * Low Con",
					"Adults * High Pro", "Adults * Low Pro", "Adults * High Mixed", "Adults * Low Mixed", "Adults * High Con", "Adults * Low Con",
					"Prior * MTurk", "Prior * Adults", 
					"Prior * MTurk * High Pro", "Prior * MTurk * Low Pro", "Prior * MTurk * High Mixed",
					"Prior * MTurk * Low Mixed", "Prior * MTurk * High Con", "Prior * MTurk * Low Con",
					"Prior * Adults * High Pro", "Prior * Adults * Low Pro", "Prior * Adults * High Mixed",
					"Prior * Adults * Low Mixed", "Prior * Adults * High Con", "Prior * Adults * Low Con"
)
xtable(tab, align=c("l","r","r","r","r"))




## PLOTS
plotcon <- with(subset(data,dv1bin==-1),expResults(dvchg,condition))
plotpro <- with(subset(data,dv1bin==1),expResults(dvchg,condition))

# plot showing changes relative to control group (by prior)
relchg <- rbind(plotcon[1:6,1]-plotcon[7,1],plotpro[1:6,1]-plotpro[7,1])
relchgSE <- rbind(plotcon[1:6,4],plotpro[1:6,4])

pdf("dvchg-relative.pdf",width=10,height=6)
plotx <- c(1,1.25,1.75,2, 3,3.25,3.75,4, 5,5.25,5.75,6)
plot(plotx,as.vector(relchg),pch=23,cex=3,bg=rep(c("gray","black"),6),col=rep(c("gray","black"),6),
	ylim=c(-0.6,0.6),bty="n",xaxt="n",yaxt="n",xlab="",ylab="")
axis(1,c(1.125,1.875,3.125,3.875,5.125,5.875),c("High Pro", "Low Pro", "High Mixed", "Low Mixed", "High Con", "Low Con"),col="white")
abline(h=0,col="gray")
axis(2,las=2)
for(i in 1:length(plotx)){
	if(i %in% c(1,3,5,7,9,11)){
		segments(plotx[i],(as.vector(relchg)+as.vector(relchgSE))[i],plotx[i],(as.vector(relchg)-as.vector(relchgSE))[i],lwd=2,col="gray")
		#segments(plotx[i],(as.vector(relchg)+(2*as.vector(relchgSE)))[i],plotx[i],(as.vector(relchg)-(2*as.vector(relchgSE)))[i],lwd=1,col="gray")
	}
	else{
		segments(plotx[i],(as.vector(relchg)+as.vector(relchgSE))[i],plotx[i],(as.vector(relchg)-as.vector(relchgSE))[i],lwd=2,col="black")
		#segments(plotx[i],(as.vector(relchg)+(2*as.vector(relchgSE)))[i],plotx[i],(as.vector(relchg)-2*as.vector(relchgSE)))[i],lwd=1,col="black")
	}
}
dev.off()

if(FALSE){
	# plot showing absolute changes (by prior)
	pdf("dvchg-absolute.pdf",width=8,height=8)
	barplot(rbind(plotcon[1:6,1],plotpro[1:6,1]),
			beside=TRUE,col=rep(c("gray","black"),6),border=NA,space=rep(c(.4,0,.1,0),3),ylim=c(-2.0,1.5),axes=FALSE)
	axis(2,las=2)
	dev.off()
}


# CERTAINTY
coefpaste(round(mean(data$certchg),2),round(sd(data$certchg)/sqrt(dim(data)[1]),2))
with(data,expResults(certchg,condition))

# change in certainty (by importance) relative to control condition
cert <- recode(data$condition,"1=1;2=0;3=1;4=0;5=1;6=0;7=NA")
coefpaste(round(with(subset(data,cert==1),mean(certchg)-with(subset(data,condition==7),mean(certchg))),2),
				round(with(subset(data,cert==1),sd(certchg)/sqrt(table(cert)[2])),2))
coefpaste(round(with(subset(data,cert==0),mean(certchg)-with(subset(data,condition==7),mean(certchg))),2),
				round(with(subset(data,cert==0),sd(certchg)/sqrt(table(cert)[1])),2))
independence_test(data$certchg~cert)

certall <- with(data,expResults(certchg,condition))
certcon <- with(subset(data,dv1bin==-1),expResults(certchg,condition))
certpro <- with(subset(data,dv1bin==1),expResults(certchg,condition))
certable <- data.frame(cbind(mapply(coefpaste,certcon[,1],certcon[,4]),
							mapply(coefpaste,certpro[,1],certpro[,4])))
names(certable) <- c("t1 con","t1 pro")
xtable(certable,label="tab:cert2",caption="Change in Attitude Certainty by Treatment Condition and Prior Opinion",align=c("l","r","r"))

mapply(coefpaste,certcon[1:6,1]-certcon[7,1],certcon[1:6,4]); mapply(coefpaste,certpro[1:6,1]-certpro[7,1],certpro[1:6,4])

if(FALSE){
	# plot showing changes relative to control group (all respondents)
	pdf("certchg.pdf",width=8,height=8)
	barplot(t(certall[1:6,1]-certall[7,1]),beside=TRUE,border=NA,ylim=c(-1.5,1.5),space=c(0,.2),axes=FALSE)
	axis(2,las=2)
	dev.off()
}

# plot showing changes relative to control group (by prior)
certplot <- rbind(certcon[1:6,1]-certcon[7,1],certpro[1:6,1]-certpro[7,1])
certplotSE <- rbind(certcon[1:6,4],certpro[1:6,4])

pdf("certchg-byprior.pdf",width=10,height=6)
plotx <- c(1,1.25,1.75,2, 3,3.25,3.75,4, 5,5.25,5.75,6)
plot(plotx,as.vector(certplot),pch=23,cex=3,bg=rep(c("gray","black"),6),col=rep(c("gray","black"),6),
	bty="n",xaxt="n",yaxt="n",xlab="",ylab="",ylim=c(-0.8,0.8))
axis(1,c(1.125,1.875,3.125,3.875,5.125,5.875),c("High Pro", "Low Pro", "High Mixed", "Low Mixed", "High Con", "Low Con"),col="white")
abline(h=0,col="black")
axis(2,las=2)
for(i in 1:length(plotx)){
	if(i %in% c(1,3,5,7,9,11))
		segments(plotx[i],(as.vector(certplot)+as.vector(certplotSE))[i],plotx[i],(as.vector(certplot)-as.vector(certplotSE))[i],lwd=3,col="gray")
	else
		segments(plotx[i],(as.vector(certplot)+as.vector(certplotSE))[i],plotx[i],(as.vector(certplot)-as.vector(certplotSE))[i],lwd=3,col="black")
}
dev.off()

if(FALSE){
	# plot showing absolute changes (by prior)
	barplot(rbind(certcon[1:6,1],certpro[1:6,1]),
			beside=TRUE,col=rep(c("gray","black"),6),border=NA,space=rep(c(.4,0,.1,0),3),ylim=c(-1.5,1.5),axes=FALSE)
	axis(2,las=2)
}



# SEARCH DATA
#barplot(table(data$total),border=FALSE)
coefpaste(mean(data$total),sd(data$total)/sqrt(dim(data)[1]))
table(data$total.hc)
table(data$total.pro)
table(data$total.con)

plotcon.total <- with(subset(data,dv1bin==-1),expResults(total,condition))[1:6,]
plotcon.bal <- with(subset(data,dv1bin==-1),expResults(total.bal,condition))[1:6,]
plotcon.prop <- with(subset(data,dv1bin==-1),expResults(prop.bal,condition))[1:6,]

plotpro.total <- with(subset(data,dv1bin==1),expResults(total,condition))[1:6,]
plotpro.bal <- with(subset(data,dv1bin==1),expResults(total.bal,condition))[1:6,]
plotpro.prop <- with(subset(data,dv1bin==1),expResults(prop.bal,condition))[1:6,]

with(data,expResults(total.bal,condition))[1:6,]
with(data,expResults(prop.bal,condition))[1:6,]

with(subset(data,condition %in% 1:6),kruskal.test(total.bal~condition))
with(subset(data,condition %in% 1:6),kruskal.test(prop.bal~condition))

search1 <- data.frame(cbind(mapply(coefpaste,plotcon.bal[,1],plotcon.bal[,4]),
							mapply(coefpaste,plotpro.bal[,1],plotpro.bal[,4])))
names(search1) <- c("t1 con","t1 pro")
#xtable(search1,label="tab:search",caption="Balance of Healthcare Articles Viewed (Pro-Con)",align=c("l","r","r"))

search2 <- data.frame(cbind(mapply(coefpaste,plotcon.prop[,1],plotcon.prop[,4]),
							mapply(coefpaste,plotpro.prop[,1],plotpro.prop[,4])))
names(search2) <- c("t1 con","t1 pro")
xtable(search2,label="tab:search",caption="Balance of Healthcare Articles Viewed (Pro-Con)",align=c("l","r","r"))

# plot showing balance of pro-con hits (by prior)
pdf("search-prop.pdf",width=10,height=6)
plotx <- c(1,1.25,1.75,2, 3,3.25,3.75,4, 5,5.25,5.75,6)
searchplot <- rbind(plotcon.prop[1:6,1],plotpro.prop[1:6,1])
searchplotSE <- rbind(plotcon.prop[1:6,4],plotpro.prop[1:6,4])
plot(plotx,as.vector(searchplot),pch=23,cex=3,bg=rep(c("gray","black"),6),col=rep(c("gray","black"),6),
	bty="n",xaxt="n",yaxt="n",xlab="",ylab="",ylim=c(-0.3,0.5))
axis(1,c(1.125,1.875,3.125,3.875,5.125,5.875),c("High Pro", "Low Pro", "High Mixed", "Low Mixed", "High Con", "Low Con"),col="white")
abline(h=0,col="black")
axis(2,las=2)
for(i in 1:length(plotx)){
	if(i %in% c(1,3,5,7,9,11))
		segments(plotx[i],(as.vector(searchplot)+as.vector(searchplotSE))[i],
				plotx[i],(as.vector(searchplot)-as.vector(searchplotSE))[i],lwd=3,col="gray")
	else
		segments(plotx[i],(as.vector(searchplot)+as.vector(searchplotSE))[i],
				plotx[i],(as.vector(searchplot)-as.vector(searchplotSE))[i],lwd=3,col="black")
}
dev.off()

# IMPORTANCE EFFECTS ON SEARCH
with(subset(data,dv1bin==-1 & condition %in% c(1,2)),independence_test(prop.bal~condition))
with(subset(data,dv1bin==1 & condition %in% c(1,2)),independence_test(prop.bal~condition))

with(subset(data,dv1bin==-1 & condition %in% c(3,4)),independence_test(prop.bal~condition))
with(subset(data,dv1bin==1 & condition %in% c(3,4)),independence_test(prop.bal~condition))

with(subset(data,dv1bin==-1 & condition %in% c(5,6)),independence_test(prop.bal~condition))
with(subset(data,dv1bin==1 & condition %in% c(5,6)),independence_test(prop.bal~condition))
